/* File 1 of 3 for Table 5 */


#delimit ;
clear all;

local outfile "GWgapr10_pfindyr_TL";
set more off;


di _n "$S_DATE $S_TIME";









********************************************************************************;
* Setting up locals for regs;

local type fte_del; 
local lab `type'; 

******************************;
* Specifying the labour input;	
	
* head count*(average fte)^delta;
	
local L_fte_del_pf = "(
	sqrt({phi_lt25_pf=0.5}^2)  *(L_hc_m_lt25  *(av_fte_m_lt25)^{delta_pf=1} + sqrt({phi_f_pf=0.9}^2)*L_hc_f_lt25  *(av_fte_f_lt25)^{delta_pf})
	+ 							(L_hc_m_25to39*(av_fte_m_25to39)^{delta_pf} + sqrt({phi_f_pf}^2)    *L_hc_f_25to39*(av_fte_f_25to39)^{delta_pf})
	+ sqrt({phi_40to54_pf=1}^2)*(L_hc_m_40to54*(av_fte_m_40to54)^{delta_pf} + sqrt({phi_f_pf}^2)    *L_hc_f_40to54*(av_fte_f_40to54)^{delta_pf})
	+ sqrt({phi_55p_pf=1}^2)   *(L_hc_m_55p   *(av_fte_m_55p)^{delta_pf}    + sqrt({phi_f_pf}^2)    *L_hc_f_55p   *(av_fte_f_55p)^{delta_pf})
	)";
	
local L_fte_del_wb = "(
	sqrt({phi_lt25_wb=0.5}^2)  *(L_hc_m_lt25  *(av_fte_m_lt25)^{delta_wb=1} + sqrt({phi_f_wb=0.9}^2)*L_hc_f_lt25  *(av_fte_f_lt25)^{delta_wb})
	+ 							(L_hc_m_25to39*(av_fte_m_25to39)^{delta_wb} + sqrt({phi_f_wb}^2)    *L_hc_f_25to39*(av_fte_f_25to39)^{delta_wb})
	+ sqrt({phi_40to54_wb=1}^2)*(L_hc_m_40to54*(av_fte_m_40to54)^{delta_wb} + sqrt({phi_f_wb}^2)    *L_hc_f_40to54*(av_fte_f_40to54)^{delta_wb})
	+ sqrt({phi_55p_wb=1}^2)   *(L_hc_m_55p   *(av_fte_m_55p)^{delta_wb}    + sqrt({phi_f_wb}^2)    *L_hc_f_55p   *(av_fte_f_55p)^{delta_wb})
	)";

local vars_fte_del "`vars_fte'";

local L_fte_dell "Sum of head count*(average FTEs)^{delta} for all labour types";

	


	
	
	

********************************************************************************;
* Reading in data;

* using obs with hc of 5+ that are in prody data;

use if Q_rest==1 using GWgap_pr_firm_v4, clear;


drop *ten* INpf_*;


xi i.ind2, pre(IN); * for 2-digit ANZSIC industry fe = 74 industries;


gen con = 1; 
	
unab extra_ctls: multiplant WPc_* INind*;






*******************************************************************************;
* Setting up matrices to collect coefficients etc;


* In regression only letting things vary by gender share, collect:
* 3 matrices, everywhere an observation is an industry

* Matrix C (coefficient matrix):
* * row name: industry code
* * col 1: empty
* * col 2-4: coeffs on lnM, lnK, lnL in production fn for industry
* * col 5-7 (prev 5): coeff on lnL in wage fn for industry
* * col 8-9 (prev 6-7): phi_fs in production fn and wage fn in industry 
* * col 10-15: phi_ages in pf fn and then in wb fn
* * col 16-17: deltas in pf and wb

* Matrix SE (standard error matrix):
* * row name: industry code
* * col 1: empty
* * col 2-4: se's on lnM, lnK, lnL in production fn for industry
* * col 5-7 (prev 5): se on lnL in wage fn for industry
* * col 8-9 (prev 6-7): se's on phi_fs in production fn and wage fn in industry year
* * col 10 (prev 8): se on (1 - phi_wb/phi_pf)
* * col 11-16: ses on phi_ages in pf fn and then in wb fn
* * col 17-18: ses on deltas in pf and wb  

* Matrix Obs (observations matrix):
* * row name: industry code
* * col 1: empty
* * col 2: number of observations in industry 


* creating locals for list of industries and list of years, and for numbers of 
elements in each;

levelsof pf_ind, local(ind3s);
local num_ind3: list sizeof ind3s;
levelsof year, local(years);
local num_year: list sizeof years;

local matrows = `num_ind3'*`num_year';


* creating matrices to store results, populating with year & # of firms;

matrix define Obs = J(`matrows',2,.);

local rownames "";
local row 0;
foreach ind in `ind3s' {;

	foreach year in `years' {;
	
		local row = `row' + 1;
		local rownames "`rownames' `ind'";
		
		qui sum con if pf_ind=="`ind'" & year==`year';
		matrix Obs[`row',2] = r(N);
		matrix Obs[`row',1] = `year';
		
	};
};


matrix rownames Obs = `rownames';
matrix colnames Obs = year obs;

preserve;
drop _all;
svmat2 Obs, names(col) rnames(ind3); 
save temp_Obs, replace;
restore;




if "`lab'"=="hc" local wgtvar L_hc_t;
if "`lab'"=="fte" | "`lab'"=="ftpt" | "`lab'"=="fte_del" | "`lab'"=="hrs" | "`lab'"=="hrs_del"
	local wgtvar L_fte_t;





foreach ind in `ind3s' {; * looping over pf_ind industries;
		
	matrix define C_`ind' = J(`num_year',17,.);
	matrix define SE_`ind' = J(`num_year',18,.);
	matrix colnames C_`ind' = year lnM_pf lnK_pf lnL_pf lnM_wb lnK_wb lnL_wb phi_f_pf phi_f_wb
		phi_lt25_pf phi_40to54_pf phi_55p_pf phi_lt25_wb phi_40to54_wb phi_55p_wb delta_pf delta_wb;
	matrix colnames SE_`ind' = year lnM_pf lnK_pf lnL_pf lnM_wb lnK_wb lnL_wb phi_f_pf phi_f_wb discrim
		phi_lt25_pf phi_40to54_pf phi_55p_pf phi_lt25_wb phi_40to54_wb phi_55p_wb delta_pf delta_wb;

	foreach year in `years' {;
	
		local row = `year' - 2001;

		matrix C_`ind'[`row',1] = `year';
		matrix SE_`ind'[`row',1] = `year';
			
		
		di as input _n _n "$S_DATE $S_TIME: Starting regression for `ind', `year'";
		
		* only running if at least 40 obs;
		qui sum lngo if pf_ind=="`ind'" & year==`year';
		if r(N)<40 {;
			local row = `row' + 1;
			di _n "Skipping `ind', `year' because <40 observations" _n;
			continue;
		};
		
				
		
		* keeping only the subset of controls that vary within industry in question;
		local extra_ctls2 "";
		foreach var in `extra_ctls' {;
			qui sum `var' if pf_ind=="`ind'" & year==`year';
			if r(sd)>0 & r(sd)<. local extra_ctls2 "`extra_ctls2' `var'";
		};
		
		* listing industry FE in extra_ctls2;
		
		local indvars "";
		local nonindvars "";
		foreach var in `extra_ctls2' {;
			if strpos("`var'","INind")==1 local indvars "`indvars' `var'";
			if strpos("`var'","INind")!=1 local nonindvars "`nonindvars' `var'";
		};
		
		* omitting the first industry FE;
		
		local ind1: word 1 of `indvars';
		local indvars: list indvars - ind1;
		local extra_ctls2: list nonindvars | indvars;
	

		
		* regs;			
			
		capture noisily nlsur (lngo = {lnL_pf=1}*ln(`L_`lab'_pf')	
				+ {lnLsq_pf=1}*ln(`L_`lab'_pf')*ln(`L_`lab'_pf')/100
				+ {lnL_lnK_pf=1}*ln(`L_`lab'_pf')*lnK/100
				+ {lnL_lnM_pf=1}*ln(`L_`lab'_pf')*lnM/100
			
				+ {lnK_pf=1}*lnK
				+ {lnKsq_pf=1}*lnKsq
				+ {lnK_lnM_pf=1}*lnK_lnM
				
				+ {lnM_pf=1}*lnM
				+ {lnMsq_pf=1}*lnMsq
				+ {xb_pf: `extra_ctls2' con})
			(lnWB = {lnL_wb=1}*ln(`L_`lab'_wb')	
				+ {lnLsq_wb=1}*ln(`L_`lab'_wb')*ln(`L_`lab'_wb')/100
				+ {lnL_lnK_wb=1}*ln(`L_`lab'_wb')*lnK/100
				+ {lnL_lnM_wb=1}*ln(`L_`lab'_wb')*lnM/100
			
				+ {lnK_wb=1}*lnK
				+ {lnKsq_wb=1}*lnKsq
				+ {lnK_lnM_wb=1}*lnK_lnM
				
				+ {lnM_wb=1}*lnM
				+ {lnMsq_wb=1}*lnMsq
				+ {xb_wb: `extra_ctls2' con})
			[aweight = `wgtvar']
 			if pf_ind=="`ind'" & year==`year', cluster(pent) iter(200); 
			
		di as input "Command just run: " e(cmdline);
		
		* don't save output if did not converge;
		if e(converged)==0 {;
			local row = `row' + 1;
			di _n "Not saving results for `ind', `year' because it did not converge in 200 iterations" _n;
			continue;
		};
		
		* saving regression results to matrices;
		matrix C_`ind'[`row',8] = _b[/phi_f_pf];
		matrix C_`ind'[`row',9] = _b[/phi_f_wb];
		matrix C_`ind'[`row',10] = _b[/phi_lt25_pf];
		matrix C_`ind'[`row',11] = _b[/phi_40to54_pf];
		matrix C_`ind'[`row',12] = _b[/phi_55p_pf];
		matrix C_`ind'[`row',13] = _b[/phi_lt25_wb];
		matrix C_`ind'[`row',14] = _b[/phi_40to54_wb];
		matrix C_`ind'[`row',15] = _b[/phi_55p_wb];
		capture matrix C_`ind'[`row',16] = _b[/delta_pf];
		capture matrix C_`ind'[`row',17] = _b[/delta_wb];
		
		

		matrix SE_`ind'[`row',8] = _se[/phi_f_pf];
		matrix SE_`ind'[`row',9] = _se[/phi_f_wb];

		nlcom 1 - _b[/phi_f_wb]/_b[/phi_f_pf];
		matrix V = r(V);
		local se = V[1,1]^0.5;
		matrix SE_`ind'[`row',10] = `se';
		
		matrix SE_`ind'[`row',11] = _se[/phi_lt25_pf];
		matrix SE_`ind'[`row',12] = _se[/phi_40to54_pf];
		matrix SE_`ind'[`row',13] = _se[/phi_55p_pf];
		matrix SE_`ind'[`row',14] = _se[/phi_lt25_wb];
		matrix SE_`ind'[`row',15] = _se[/phi_40to54_wb];
		matrix SE_`ind'[`row',16] = _se[/phi_55p_wb];
		capture matrix SE_`ind'[`row',17] = _se[/delta_pf];
		capture matrix SE_`ind'[`row',18] = _se[/delta_wb];
		
	};
	
	matrix list C_`ind';
	matrix list SE_`ind';


	preserve;
	
	* saving results for industry;
	
	drop _all;
	svmat2 C_`ind', names(col);
	gen ind3 = "`ind'";
	gen created = "$S_DATE $S_TIME";
	notes: `outfile'_`ind'_c.dta is an intermediate file of regression coeffs;
	*capture append using `outfile'_`ind'_c;
	save `outfile'_`ind'_c, replace;
	
	drop _all;
	svmat2 SE_`ind', names(col);
	gen ind3 = "`ind'";
	gen created = "$S_DATE $S_TIME";
	notes: `outfile'_`ind'_se.dta is an intermediate file of regression ses;
	*capture append using `outfile'_`ind'_se;
	save `outfile'_`ind'_se, replace;	
	
	restore;

};	






* converting saved results into a data set;

#delimit ;

* Use the following line for actual code;
*use `outfile'_AA11_se, clear; 

* Drop the following line for actual code;
use `outfile'_A_se, clear; 

drop if _n>=1;
foreach ind in `ind3s' {;
	append using `outfile'_`ind'_se;
};
rename * se_*;
rename se_year year;
rename se_ind3 ind3;
rename se_created created;
duplicates drop;
drop if year==.;

* truncating created variable to exclude seconds and ones of minutes;

local leng = length(created);
local leng = `leng' - 4;
replace created = substr(created, 1, `leng'); 

save temp_SE, replace;

* Drop the following line for actual code;
use `outfile'_A_c, clear; *##############################;
* Use the following line for actual code;
*use `outfile'_AA11_c, clear; 
foreach ind in `ind3s' {;
	append using `outfile'_`ind'_c;
};
rename * c_*;
rename c_year year;
rename c_ind3 ind3;
rename c_created created;
duplicates drop;
drop if year==.;

* truncating created variable to exclude seconds and ones of minutes;

local leng = length(created);
local leng = `leng' - 4;
replace created = substr(created, 1, `leng');
save temp_C, replace;

merge 1:1 ind3 year created using temp_SE;

qui sum _m;
assert r(mean)==3;
#delimit ;

* checking no year-industries are duplicates;

duplicates tag year ind3, gen(dup);
qui sum dup;
assert r(mean)==0;

* merging number of obs;

capture drop _m;
merge 1:1 year ind3 using temp_Obs;
qui sum _m;
assert r(mean)==3;

* confidentialises observation count;
*grr obs;
*drop obs;
rename obs rr3obs; * delete this line when uncommenting two above;

label var rr3obs "Number of observations in regression, rr3";


gen c_discrim_f = 1 - c_phi_f_wb/c_phi_f_pf;
label var c_discrim_f "Estimated discrimination against females, 1 - phi_f_wb/phi_f_pf";
rename se_discrim se_discrim_f;
label var se_discrim_f "Standard error on estimated discrimination against females";
label var year "Financial year";
label var ind3 "Productivity industry";

drop c_lnM_pf c_lnK_pf c_lnL_pf c_lnM_wb c_lnK_wb c_lnL_wb se_lnM_pf se_lnK_pf se_lnL_pf 
	se_lnM_wb se_lnK_wb se_lnL_wb _merge dup;
order ind3 year rr3obs c_discrim_f se_discrim_f;
sort ind3 year;


notes: `outfile'_res_`lab' created on $S_DATE $S_TIME.;
notes: `outfile'_res_`lab' is a set of regression results generated by `outfile'.do.
The regressions are run separately by productivity industry X year, and the phi_fs in the production and
wage equations DO vary by year. The regessions include phi_f and age
phis, but not interacted. Year FE and anzsic 2-digit industry FE are included.;
notes: The labour specification is `type': `L_`lab'l'.;
notes: Regressions are weighted by total FTEs.;
save `outfile'_res_`lab', replace;





